import os
import gzip

assembly = "hg38"

def timestamp(filename):
    import time, os
    t = os.path.getmtime(filename)
    return time.strftime("%Y.%m.%d", time.localtime(t))


directory = "/osc-fs_home/mdehoon/Data/RepeatMasker/"
filename = "%s.rmsk.txt.gz" % assembly
path = os.path.join(directory, assembly, filename)
time = timestamp(path)
print("Reading", path)
stream = gzip.open(path, 'rt')
filename = "tRNA.gff"
print("Writing", filename)
output = open(filename, 'w')
source = "UCSC:RepeatMasker"
output.write("##source-version %s %s\n" % (source, time))

for line in stream:
    if line.startswith("#"):
        continue
    words = line.split()
    chromosome = words[5]
    start = int(words[6])
    start+=1
    end = int(words[7])
    strand = words[9]
    repName = words[10]
    repClass = words[11]
    repFamily = words[12]
    if repClass!='tRNA':
        continue
    line = "%s\tRepeatMasker\t%s|%s\t%d\t%d\t.\t%s\t.\t.\n" % (chromosome, repClass, repName, start, end, strand)
    output.write(line)

stream.close()
output.close()
